;TODO: tf tb should be in memory

; Vector3: X grows right, Y down, Z forward.
; On the FP stack and in memory it looks like {Y X Z} (sometimes I need only Y).

org 100h ; assume al=0 bx=0 sp=di=-2 si=0100h bp=09??h
  dw 12        ;=0C 00  or al,0

%define S(x) [byte x + si-100h]

  mov al,13h
  dec di
DI_ equ -3     ; pixel_adr@di = -3
P:int 10h      ; video mode, set palette index: bx=i dh=R ch=G cl=B

  mov al,bl
  cbw
  xor al,ah    ; 0..127 127..0
;  shrd dx,ax,9
  mov dh,al
  shr dh,1
  mov ch,dh

  mul al
  or bl,bl
  jns S
  xchg ax,dx
S:mov cl,ah
  shr cl,1
  add cl,[si]
  
  mov ax,1010h
  inc bx
  jnz P

;Each frame: the visible pixels are A0000..AF9FF, I want X=0 Y=0 in the center
M:mov dx,0xA000-10-20-20-4 ;=0x9fca
  mov es,dx    ; dx:bx=YX:XX = 0x9fca:0

;Generate gem normals to p0..p12=[bp+200h,300h,...].
  pusha  ; adr:   -18 -16 -14 -12 -10  -8  -6  -4  -2
         ; stack:  di  si  bp  sp  bx  dx  cx  ax   0
         ; data:   -3 100 9??  -2  0  9fca T  key
  mov cx,[si]
G:add bp,si    ; i@cx = 12...1; bp points to p[12-i]; carry=0
  pusha
  fninit       ; clear FP stack

;Dodecahedron (with unit normals).
  fld1
  fsincos
  fldz
N:test cl,1
  jnz K
  fchs
K:fstp st3
  loop N

;Do a bunch of rotations. (It doesn't need to be fast.)
  mov cl,24
R:fld st2            ;|z y x z                     ;|x sz y x cz
  fild word[-6+di-DI_]
BIG: ;=30174
  fidiv word[-16+di-DI_]  ;|t=T/256
  fsincos            ;|c=cos(t) s=sin(t) z y x z   ;|c s x sz y x cz
  fmulp st5          ;|s z y x cz                  ;|s x sz y cx cz
  fmulp              ;|sz y x cz                   ;|sx sz y cx cz
  cmc
  jc R
  fsubp st4          ;|sz y cx cz-sx
  faddp st2          ;|y cx+sz cz-sx
  fstp st3           ;|new.x=cx+sz .y=cz-sx .z=y
  loop R
STORE:
  fstp dword[bp+si]
  fstp dword[bp+si+4]
  fstp dword[bp+si+8]
  popa
  loop G
  popa

;Each pixel: cx=T dx:bx=YX:XX(init=9fca:0) di=adr(init=0)
X:inc dx       ; part of "dx:bx += 0x0000CCCD"
X2:
  pusha        ; adr:     -18 -16 -14 -12 -10  -8  -6  -4  -2
  fninit       ; stack:    di  si  bp  sp  bx  dx  cx  ax   0
  mov di,-4    ; s16:  pixadr 100 9??  -2  ..X..Y  T result

;Compute ray direction.
  fild word S(BIG)
  fild word[di+4-9]
  fild word[di+4-8]  ;|y=Y x=X z=BIG
  call GEM
  fistp word[di]; if fistp overflows, it stores 0x8000
  popa         ; color -> pushed ax
  salc         ; al = ground ? 0 : 0xff
  xor al,ah
;  mov al,dl    ; test - show only palette

;; Faster version: draw each pixel twice.
;  stosb
;  add bx,0xCCCD; dx:bx = YXX += 0000CCCD
;  adc dx,0

  stosb
  add bx,0xCCCD; dx:bx = YXX += 0000CCCD
  jnc X2
  jnz X        ; do 65536 pixels

  inc cx       ; T++
  in al,60h
  dec al
  jnz M
  ret          ; no fallthrough

GEM: ;Hit the gem. Front plane @ dx, back plane @ bp
  fild dword[si]     ;|tf=0 tb=HUGE=0xC40013 y x z
  fldz
  mov cx,[si]  ; i@cx = 12...1; bp points to p[i]

;Ray-plane intersection.
I:add bp,si          ;|tf tb rd.y .x .z
;Dot product.
  add si,[si]  ;108 104 100
DP:add si,di   ;-4
  fld dword[bp+si]   ;|p[i].z ...
  fmul st5           ;|rd.z*p[i].z ...
  jpo DP             ;|(rd*p[i]).y .x .z tf tb rd.y .x .z
  faddp
  faddp              ;|D=(rd|p[i]) tf tb rd.y .x .z

  fst dword[bp+di]   ; -> p[i].dot_rd
  test [bp+di+2],di; sf=1 if we're in front of the plane
  fldlg2             ;|pd D tf tb rd.y .x .z              ; pd=0.301
  fadd dword[bp+si+8];|N=pd-(ro|p[i]) D tf tb rd.y .x .z  ; ro.z=-1
  fdivrp st1         ;|t=N/D tf tb rd.y .x .z
  jns BACK
FRONT:         ;D<0: front
  fcom st1
  fnstsw ax
  sahf
  jb NEXT      ;if t>=tf { tf=t; pf@bx = current; }
  fst st1
  mov bx,bp
  jmp NEXT
BACK:          ;D>=0: back
  fcom st2
  fnstsw ax
  sahf
  jnb NEXT     ;if t<tb { tb=t; }
  fst st2
NEXT:
  fstp st0           ;|tf tb rd.y .x .z
  fcom
  fnstsw ax
  sahf         ;if tf>=fb { no_hit: cf=0; early exit } else { cf=1 }
  jnb EXIT
  loop I
EXIT:
  fcompp             ;|rd.y .x .z (get rid of 'tf tb')
  jnc BGD

;Compute the reflection from the gem. reflect(i,n) = i - 2*n*(i|n)
  add si,[si] ;108 104 100
Y:add si,di   ;-4
  fld dword[bx+di]   ;|(rd|pb) rd.y .x .z
  fmul dword[bx+si]  ;|(rd|pb)*pb.z rd.y .x .z
  fadd st0           ;|2*(rd|pb)*pb.z rd.y .x .z
  fsubr st3          ;|R.z=rd.z-2*(rd|pb)*pb.z rd.y .x .z
  jpo Y              ;|(R=i-2*n(i|n)).y R.x R.z rd.y .x .z

;Environment map: checkerboard below, light gradient above.
BGD:                 ;|y x z
  ftst
  fabs               ;|Y=abs(y) x z
  fnstsw ax
  sahf         ; if y>0 { checker } else { sky }
  jb SKY       ; the sky is just Y (= y^2 after gamma)
  fidiv word[si]     ;|Y/12 x z
  fdiv st2,st0
  fdiv st1,st0
  fxch st2           ;|u=12*z/Y v=12*x/Y Y/12

  fistp word[di]
  mov al,[di]
  add al,[di+4-6]
  fistp word[di]
  xor al,[di]  ; ax = (u+T) xor v
  and ax,9
  add al,5     ; tex = (((u+T) xor v) and 9) + 5
  mov [di],ax
  fimul word[di]     ;|tex*Y/12
SKY:
  ret
